﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_sales_cityind_sum.sas 8 &
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;

/*
================================================================================
Load raw data 
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read_sales.sas" /source2;

%m_read_sales(param_dev=0, param_lyear=0, param_czone=1, param_msa1983=1, param_msacz=1, param_drop=1);

/*
================================================================================
Calculate concentration at the city-industry level
*/

%macro cind_sum(dt_in= , dt_out= , c_city= , l_ind_perc= , l_cind_rank= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put City &c_city.;
%put Percentile &l_ind_perc.;

/* Drop emp-based perc and rank. Rename the sales-based perc and rank. */
data dt_proc;
  set &dt_in.(keep=year &c_city. ch_ind sector firmnum lbdid sales sales_indf_rank sales_indf_perc runif);
  rename sales_indf_rank=indf_rank sales_indf_perc=indf_perc;
run;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city-firm level 
*/

proc sort data=dt_proc;
  by year sector ch_ind &c_city. firmnum indf_rank indf_perc;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind &c_city. firmnum indf_rank indf_perc;
  output out=dt_indf(drop=_type_ _freq_) sum(sales)=sales_cind mean(runif)=runif;
run;
/* Note that *_cind will become city-industry level values in the end; currently at city-ind-firm level */

data dt_indf;
  set dt_indf;
  n_cind=1;
  sales2_cindf = sales_cind**2; /* For HHI */
run;


/*
--------------------------------------------------------------------------------
Generate firm rank in city-industry 
*/

proc sort data=dt_indf; by year &c_city. ch_ind indf_rank;

data dt_indf;
  set dt_indf;
  by year &c_city. ch_ind;
  if first.ch_ind then cindf_rank=0;
  cindf_rank+1;
run;

%Let l_cind_var_0=sales_cind n_cind sales2_cindf;

/* 
--------------------------------------------------------------------------------
Create variables related to top industry firms
e.g. sales of top 10% industry-firms in the city (percentile based on sales and defined at industry level)
*/
%Let l_ind_var= ;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_ind_perc.), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_ind_perc.), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    sales_ind_&i_perc. = 0;
    n_ind_&i_perc. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	sales_ind_&i_perc. = sales_cind;
	n_ind_&i_perc. = 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. sales_ind_&i_perc. n_ind_&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Industry Firms;
%put &l_ind_var.;

/* 
--------------------------------------------------------------------------------
Create variables related to top city-industry firms 
e.g. sales of top 1 city-industry firm in the city (rank based on sales and defined at city-industry level)
*/

%Let l_cind_var= ;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_cind_rank.), &i_list) ~= );
  %Let i_rank=%scan(%bquote(&l_cind_rank.), &i_list.);
  %put Top &i_rank. City-Industry Firms;
  
  data dt_indf;
    set dt_indf;
    sales_cind_t&i_rank. = 0;
    n_cind_t&i_rank. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if cindf_rank <= &i_rank. then 
      do;
	sales_cind_t&i_rank. = sales_cind;
	n_cind_t&i_rank. = 1;
      end;
  run;
  
  %Let l_cind_var = &l_cind_var. sales_cind_t&i_rank. n_cind_t&i_rank.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top City-Industry Firms;
%put &l_cind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city level 
*/

proc sort data=dt_indf; by year sector ch_ind &c_city.;

proc means data=dt_indf noprint;
  by year sector ch_ind &c_city.;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_cind_var_0. &l_ind_var. &l_cind_var.)=&l_cind_var_0. &l_ind_var. &l_cind_var.;
run;

%mend;

/*
================================================================================
Main macro that calls the previous macro for a given geo definition
*/

%macro m_main(vgeo= );

%put --------------------------------------------------------------------------------;
%put Geo Variable: &vgeo.;

%cind_sum(dt_in=lbd, dt_out=lbd_cind_&vgeo., c_city=&vgeo., l_ind_perc=10 5 1, l_cind_rank=1);

data lbd_cind_&vgeo.;
  set lbd_cind_&vgeo.;
  drop _type_ _freq_;
run;

proc export data=lbd_cind_&vgeo. outfile="&dir_out./sales_cityind_sum_&vgeo..dta" replace;
run;

%mend;

/*
================================================================================
Loop over geo definitions
*/

%m_main(vgeo=msa);
%m_main(vgeo=fips);
%m_main(vgeo=czone);
%m_main(vgeo=msa1983);
%m_main(vgeo=msa1983cz);
%m_main(vgeo=msa2010cz);

/* End of SAS file */
